---
title: "Chapter II Regressions 2.15"
author: "Jonah Simon"
date: "2026-02-07"
output:
  word_document: default
  html_document:
    df_print: paged
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r}
library(tidyverse)
library(scales)
library(cowplot)
```


```{r}
nyt <- read.csv("C:/Users/jonah/Downloads/Thesis Datasets/Times Index.csv")
hearings <- read.csv("C:/Users/jonah/Downloads/Thesis Datasets/Questions Database Fullest.csv")
nyt_civil <- read.csv("C:/Users/Jonah/Downloads/Thesis Datasets/Times Index Civil Rights.csv")
scotus<- read.csv("C:/Users/jonah/Downloads/Thesis Datasets/SCOTUS Major Issues.csv")
```

```{r}
hearings <- hearings |>
  filter(
    speak == 1
  )
```


```{r}
min_cov_year <- 1946L
max_cov_year <- 2010L
all_years <- min_cov_year:max_cov_year

lag1_hearing_year_min  <- min_cov_year + 1   
avg10_hearing_year_min <- min_cov_year + 10  


civil_rights_subissues <- tibble(
  subissue = c(201, 202, 204, 205, 206, 207, 208, 209, 20081),
  label = c(
    "Minority Discrimination",
    "Gender & Sexual Orientation Discrimination",
    "Age Discrimination",
    "Handicap or Disease Discrimination",
    "Voting Rights",
    "Freedom of Speech & Religion",
    "Right to Privacy",
    "Anti-Government Activities",
    "Abortion"
  )
)

hearing_q_share <- function(hearings_df, subissue_code) {
  hearings_cleaned <- hearings_df |>
    filter(speak == 1) |>
    mutate(year = as.integer(year)) |>
    filter(!is.na(year), year >= lag1_hearing_year_min, year <= max_cov_year)

  totals <- hearings_cleaned |>
    group_by(order, year) |>
    summarise(total_q = n(), .groups = "drop")

  as_code <- function(x, code) !is.na(x) & as.character(x) == as.character(code)

  q_sub <- hearings_cleaned |>
    filter(
      as_code(subissue, subissue_code) |
        as_code(itemsb1, subissue_code) |
        as_code(itemsb2, subissue_code) |
        as_code(itemsb3, subissue_code) |
        as_code(itemsb4, subissue_code) |
        as_code(itemsb5, subissue_code)
    ) |>
    group_by(order, year) |>
    summarise(q_sub = n(), .groups = "drop")

  totals |>
    left_join(q_sub, by = c("order", "year")) |>
    mutate(
      q_sub = ifelse(is.na(q_sub), 0L, q_sub),
      q_share = ifelse(total_q > 0, q_sub / total_q, NA_real_)
    )
}

lag_10yr_avg <- function(x) {
  vapply(seq_along(x), function(i) {
    idx <- (i - 10):(i - 1)
    idx <- idx[idx >= 1]
    if (length(idx) == 0) NA_real_ else mean(x[idx], na.rm = TRUE)
  }, numeric(1))
}

build_nyt_share_lag_and_avg10 <- function(nyt_all_df,
                                         nyt_civil_df,
                                         subissue_code,
                                         code_col = "subissue",
                                         include_codes = NULL) {

  codes_to_match <- if (is.null(include_codes)) {
    as.character(subissue_code)
  } else {
    unique(as.character(c(subissue_code, include_codes)))
  }

  denominator <- nyt_all_df |>
    mutate(year = as.integer(year)) |>
    filter(!is.na(year), year >= min_cov_year, year <= max_cov_year) |>
    count(year, name = "nyt_total")

  numerator <- nyt_civil_df |>
    mutate(year = as.integer(year)) |>
    filter(!is.na(year), year >= min_cov_year, year <= max_cov_year) |>
    filter(!is.na(.data[[code_col]])) |>
    filter(as.character(.data[[code_col]]) %in% codes_to_match) |>
    count(year, name = "n")

  full <- tibble(year = all_years) |>
    left_join(denominator, by = "year") |>
    left_join(numerator, by = "year") |>
    mutate(
      nyt_total = ifelse(is.na(nyt_total), 0L, nyt_total),
      n = ifelse(is.na(n), 0L, n),
      share = ifelse(nyt_total > 0, n / nyt_total, 0)
    ) |>
    arrange(year) |>
    mutate(
      lag1 = dplyr::lag(share, 1),
      avg10 = lag_10yr_avg(share)
    )

  full |> select(year, lag1, avg10)
}

build_scotus_count_lag_and_avg10 <- function(df, subissue_code, code_col = "subtopic") {

  yearly <- df |>
    mutate(year = as.integer(year)) |>
    filter(!is.na(year), year >= min_cov_year, year <= max_cov_year) |>
    filter(!is.na(.data[[code_col]])) |>
    filter(as.character(.data[[code_col]]) == as.character(subissue_code)) |>
    count(year, name = "n")

  full <- tibble(year = all_years) |>
    left_join(yearly, by = "year") |>
    mutate(n = ifelse(is.na(n), 0L, n)) |>
    arrange(year) |>
    mutate(
      lag1 = dplyr::lag(n, 1),
      avg10 = vapply(
        seq_len(n()),
        function(i) {
          idx <- (i - 10):(i - 1)
          idx <- idx[idx >= 1]
          if (length(idx) == 0) NA_real_ else mean(n[idx])
        },
        numeric(1)
      )
    )

  full |> select(year, lag1, avg10)
}

spearman_function <- function(x, y) {
  d <- tibble(x = x, y = y) |> filter(!is.na(x), !is.na(y))
  ok <- isTRUE(nrow(d) >= 3 && is.finite(sd(d$x)) && sd(d$x) > 0 && is.finite(sd(d$y)) && sd(d$y) > 0)
  if (!ok) return(list(rho = NA_real_, p = NA_real_, n = nrow(d)))
  ct <- cor.test(d$x, d$y, method = "spearman")
  list(rho = unname(ct$estimate), p = ct$p.value, n = nrow(d))
}

hearings_spearman <- function(subissue_code,
                                    hearings_df,
                                    nyt_all_df,    
                                    nyt_civil_df, 
                                    scotus_df,
                                    nyt_code_col = "subissue",
                                    scotus_code_col = "subtopic") {

  Q <- hearing_q_share(hearings_df, subissue_code)

  nyt_include <- if (as.character(subissue_code) == "208") c(20081) else NULL

  nyt_series <- build_nyt_share_lag_and_avg10(
    nyt_all_df   = nyt_all_df,
    nyt_civil_df = nyt_civil_df,
    subissue_code = subissue_code,
    code_col = nyt_code_col,
    include_codes = nyt_include
  )

  scotus_series <- build_scotus_count_lag_and_avg10(
    df = scotus_df,
    subissue_code = subissue_code,
    code_col = scotus_code_col
  )

  d <- Q |>
    left_join(nyt_series, by = "year") |>
    left_join(scotus_series, by = "year", suffix = c("_nyt", "_scotus"))

  a <- spearman_function(d$q_share, d$lag1_nyt)  
  b <- spearman_function(d$q_share, d$avg10_nyt)    
  c <- spearman_function(d$q_share, d$lag1_scotus)    
  d2 <- spearman_function(d$q_share, d$avg10_scotus)  

  tibble(
    subissue = subissue_code,
    rho_Q_NYT_lag1     = a$rho, p_Q_NYT_lag1     = a$p, n_Q_NYT_lag1     = a$n,
    rho_Q_NYT_avg10    = b$rho, p_Q_NYT_avg10    = b$p, n_Q_NYT_avg10    = b$n,
    rho_Q_SCOTUS_lag1  = c$rho, p_Q_SCOTUS_lag1  = c$p, n_Q_SCOTUS_lag1  = c$n,
    rho_Q_SCOTUS_avg10 = d2$rho,p_Q_SCOTUS_avg10 = d2$p,n_Q_SCOTUS_avg10 = d2$n,
    n_hearing_rows = nrow(d)
  )
}


nyt_code_col    <- "subissue" 
scotus_code_col <- "subtopic" 

results_panel <- lapply(
  civil_rights_subissues$subissue,
  function(code) hearings_spearman(
    subissue_code = code,
    hearings_df   = hearings,
    nyt_all_df    = nyt,        
    nyt_civil_df  = nyt_civil, 
    scotus_df     = scotus,
    nyt_code_col  = nyt_code_col,
    scotus_code_col = scotus_code_col
  )
)

civil_results_raw <- bind_rows(results_panel) |>
  left_join(civil_rights_subissues, by = "subissue") |>
  relocate(label, .after = subissue)


sig_stars <- function(p) {
  ifelse(
    is.na(p), "",
    ifelse(p < 0.001, "***",
           ifelse(p < 0.01, "**",
                  ifelse(p < 0.05, "*", "")))
  )
}
fmt_rho <- function(rho, p) {
  ifelse(is.na(rho), NA_character_, paste0(sprintf("%.3f", rho), sig_stars(p)))
}
fmt_p <- function(p) {
  ifelse(is.na(p), NA_character_, paste0("(", ifelse(p < 0.001, "<.001", sprintf("%.3f", p)), ")"))
}

subissue_results_twoline <- civil_results_raw |>
  transmute(
    subissue_code = subissue,
    Subissue = label,

    NYT_lag1_rho     = fmt_rho(rho_Q_NYT_lag1,  p_Q_NYT_lag1),
    NYT_lag1_p       = fmt_p(p_Q_NYT_lag1),

    NYT_avg10_rho    = fmt_rho(rho_Q_NYT_avg10, p_Q_NYT_avg10),
    NYT_avg10_p      = fmt_p(p_Q_NYT_avg10),

    SCOTUS_lag1_rho  = fmt_rho(rho_Q_SCOTUS_lag1,  p_Q_SCOTUS_lag1),
    SCOTUS_lag1_p    = fmt_p(p_Q_SCOTUS_lag1),

    SCOTUS_avg10_rho = fmt_rho(rho_Q_SCOTUS_avg10, p_Q_SCOTUS_avg10),
    SCOTUS_avg10_p   = fmt_p(p_Q_SCOTUS_avg10)
  ) |>
  pivot_longer(
    cols = -c(subissue_code, Subissue),
    names_to = c("Measure", "Stat"),
    names_pattern = "^(.*)_(rho|p)$",
    values_to = "value"
  ) |>
  pivot_wider(names_from = Measure, values_from = value) |>
  arrange(subissue_code, factor(Stat, levels = c("rho", "p")))|>
  mutate(
    Subissue = ifelse(Stat == "p", "", Subissue),
    Stat     = ifelse(Stat == "rho", "ρ", "")
  ) |>
  select(-subissue_code) |>
  relocate(Stat, .after = Subissue)

subissue_results_twoline <- subissue_results_twoline |>
  rename(
    `Question Share x Times Index (1-yr lag)` = NYT_lag1,
    `Question Share x Times Index (10-yr avg)` = NYT_avg10,
    `Question Share x SCOTUS (1-yr lag)` = SCOTUS_lag1,
    `Question Share x SCOTUS (10-yr avg)` = SCOTUS_avg10
  )

print(subissue_results_twoline)

knitr::kable(
subissue_results_twoline,
align = "llcccc",
)

```

```{r}
lag1_hearing_year_min  <- min_cov_year + 1   
avg10_hearing_year_min <- min_cov_year + 10  

major_issues <- tibble(
  issue = c(1,2,3,4,5,6,7,8,9,10,12,13,14,15,16,17,18,19,20,21),
  label = c(
    "Macroeconomics",
    "Civil Rights",
    "Health",
    "Agriculture",
    "Labor, Immigration, and Employment",
    "Education",
    "Environment",
    "Energy",
    "Immigration",
    "Transportation",
    "Law, Crime, and Family Issues",
    "Social Welfare",
    "Community Development and Housing",
    "Banking, Finance and Domestic Commerce",
    "Defense",
    "Space, Science, Technology, and Communications",
    "Foreign Trade",
    "International Affairs",
    "Federal Government Operations",
    "Public Lands and Water Management"
  )
)

hearing_q_share_issue <- function(hearings_df, issue_code) {

  hearings_cleaned <- hearings_df |>
    filter(speak == 1) |>
    mutate(year = as.integer(year), issue = as.integer(issue)) |>
    filter(!is.na(year),
           year >= lag1_hearing_year_min,
           year <= max_cov_year)

  totals <- hearings_cleaned |>
    group_by(order, year) |>
    summarise(total_q = n(), .groups = "drop")

  q_issue <- hearings_cleaned |>
    filter(!is.na(issue), issue == as.integer(issue_code)) |>
    group_by(order, year) |>
    summarise(q_issue = n(), .groups = "drop")

  totals |>
    left_join(q_issue, by = c("order", "year")) |>
    mutate(
      q_issue = ifelse(is.na(q_issue), 0L, q_issue),
      q_share = ifelse(total_q > 0, q_issue / total_q, NA_real_)
    )
}

build_nyt_share_lag_and_avg10 <- function(nyt_df, issue_code) {

  denominator <- nyt_df |>
    mutate(year = as.integer(year)) |>
    filter(!is.na(year), year >= min_cov_year, year <= max_cov_year) |>
    count(year, name = "nyt_total")

  numerator <- nyt_df |>
    mutate(year = as.integer(year), majortopic = as.integer(majortopic)) |>
    filter(!is.na(year), year >= min_cov_year, year <= max_cov_year) |>
    filter(!is.na(majortopic), majortopic == as.integer(issue_code)) |>
    count(year, name = "n_issue")

  full <- tibble(year = all_years) |>
    left_join(denominator, by = "year") |>
    left_join(numerator, by = "year") |>
    mutate(
      nyt_total = ifelse(is.na(nyt_total), 0L, nyt_total),
      n_issue   = ifelse(is.na(n_issue), 0L, n_issue),
      share     = ifelse(nyt_total > 0, n_issue / nyt_total, 0)
    ) |>
    arrange(year) |>
    mutate(
      lag1  = dplyr::lag(share, 1),
      avg10 = lag_10yr_avg(share)
    )

  full |> select(year, lag1, avg10)
}

build_scotus_count_lag_and_avg10 <- function(df, issue_code) {

  yearly <- df |>
    mutate(year = as.integer(year), majortopic = as.integer(majortopic)) |>
    filter(!is.na(year), year >= min_cov_year, year <= max_cov_year) |>
    filter(!is.na(majortopic), majortopic == as.integer(issue_code)) |>
    count(year, name = "n")

  full <- tibble(year = all_years) |>
    left_join(yearly, by = "year") |>
    mutate(n = ifelse(is.na(n), 0L, n)) |>
    arrange(year)

  n_vec <- full$n

  full |>
    mutate(
      lag1 = dplyr::lag(n_vec, 1),
      avg10 = vapply(
        seq_along(n_vec),
        function(i) {
          idx <- (i - 10):(i - 1)
          idx <- idx[idx >= 1]
          if (length(idx) == 0) NA_real_ else mean(n_vec[idx])
        },
        numeric(1)
      )
    ) |>
    select(year, lag1, avg10)
}

run_issue_model <- function(issue_code, hearings_df, nyt_df, scotus_df) {

  Q <- hearing_q_share_issue(hearings_df, issue_code)

  nyt_series <- build_nyt_share_lag_and_avg10(nyt_df, issue_code)
  scotus_series  <- build_scotus_count_lag_and_avg10(scotus_df, issue_code)

  d <- Q |>
    left_join(nyt_series, by = "year") |>
    left_join(scotus_series, by = "year", suffix = c("_nyt", "_scotus"))

  a  <- spearman_function(d$q_share, d$lag1_nyt)
  b  <- spearman_function(d$q_share, d$avg10_nyt)
  c  <- spearman_function(d$q_share, d$lag1_scotus)
  d2 <- spearman_function(d$q_share, d$avg10_scotus)

  tibble(
    issue = issue_code,
    rho_Q_NYT_lag1     = a$rho,  p_Q_NYT_lag1     = a$p,  n_Q_NYT_lag1     = a$n,
    rho_Q_NYT_avg10    = b$rho,  p_Q_NYT_avg10    = b$p,  n_Q_NYT_avg10    = b$n,
    rho_Q_SCOTUS_lag1  = c$rho,  p_Q_SCOTUS_lag1  = c$p,  n_Q_SCOTUS_lag1  = c$n,
    rho_Q_SCOTUS_avg10 = d2$rho, p_Q_SCOTUS_avg10 = d2$p, n_Q_SCOTUS_avg10 = d2$n
  )
}

issue_results_raw <- bind_rows(lapply(
  major_issues$issue,
  function(code) run_issue_model(
    issue_code  = code,
    hearings_df = hearings,
    nyt_df      = nyt,
    scotus_df   = scotus
  )
)) |>
  left_join(major_issues, by = "issue") |>
  arrange(issue) |>
  relocate(label, .after = issue)

issue_results_twoline <- issue_results_raw |>
  transmute(
    issue_code = issue,
    Issue = label,

    NYT_lag1_rho     = fmt_rho(rho_Q_NYT_lag1,  p_Q_NYT_lag1),
    NYT_lag1_p       = fmt_p(p_Q_NYT_lag1),

    NYT_avg10_rho    = fmt_rho(rho_Q_NYT_avg10, p_Q_NYT_avg10),
    NYT_avg10_p      = fmt_p(p_Q_NYT_avg10),

    SCOTUS_lag1_rho  = fmt_rho(rho_Q_SCOTUS_lag1,  p_Q_SCOTUS_lag1),
    SCOTUS_lag1_p    = fmt_p(p_Q_SCOTUS_lag1),

    SCOTUS_avg10_rho = fmt_rho(rho_Q_SCOTUS_avg10, p_Q_SCOTUS_avg10),
    SCOTUS_avg10_p   = fmt_p(p_Q_SCOTUS_avg10)
  ) |>
  pivot_longer(
    cols = -c(issue_code, Issue),
    names_to = c("Measure", "Stat"),
    names_pattern = "^(.*)_(rho|p)$",
    values_to = "value"
  ) |>
  pivot_wider(names_from = Measure, values_from = value) |>
  arrange(issue_code, factor(Stat, levels = c("rho", "p")))|>
  mutate(
    Issue = ifelse(Stat == "p", "", Issue),
    Stat  = ifelse(Stat == "rho", "ρ", "")
  ) |>
  select(-issue_code) |>
  relocate(Stat, .after = Issue)

issue_results_twoline <- issue_results_twoline |>
  rename(
    `Question Share x Times Index (1-yr lag)` = NYT_lag1,
    `Question Share x Times Index (10-yr avg)` = NYT_avg10,
    `Question Share x SCOTUS (1-yr lag)` = SCOTUS_lag1,
    `Question Share x SCOTUS (10-yr avg)` = SCOTUS_avg10
  )

print(issue_results_twoline)

knitr::kable(
   issue_results_twoline,
   align = "llcc"
)

```

```{r}
build_share_and_avg10 <- function(df, issue_code, code_col = "majortopic") {

  denominator <- df |>
    mutate(year = as.integer(year)) |>
    filter(!is.na(year),
           year >= min_cov_year,
           year <= max_cov_year) |>
    count(year, name = "total_year")

  numerator <- df |>
    mutate(year = as.integer(year),
           code = as.integer(.data[[code_col]])) |>
    filter(!is.na(year),
           year >= min_cov_year,
           year <= max_cov_year,
           !is.na(code),
           code == as.integer(issue_code)) |>
    count(year, name = "n_issue")

  full <- tibble(year = all_years) |>
    left_join(denominator, by = "year") |>
    left_join(numerator, by = "year") |>
    mutate(
      total_year = ifelse(is.na(total_year), 0L, total_year),
      n_issue    = ifelse(is.na(n_issue), 0L, n_issue),
      share      = ifelse(total_year > 0, n_issue / total_year, 0)
    ) |>
    arrange(year) |>
    mutate(avg10 = lag_10yr_avg(share)) |>
    select(year, share, avg10)

  full
}

run_nyt_scotus_sameyear <- function(issue_code, nyt_df, scotus_df) {

  nyt_series <- build_share_and_avg10(nyt_df, issue_code, code_col = "majortopic")
  scotus_series  <- build_share_and_avg10(scotus_df, issue_code, code_col = "majortopic")

  d <- nyt_series |>
    left_join(scotus_series, by = "year", suffix = c("_nyt", "_scotus"))


  a <- spearman_function(d$share_nyt, d$share_scotus)
  b <- spearman_function(d$avg10_nyt, d$avg10_scotus)

  tibble(
    issue = issue_code,
    rho_sameyear = a$rho, p_sameyear = a$p, n_sameyear = a$n,
    rho_avg10    = b$rho, p_avg10    = b$p, n_avg10    = b$n
  )
}

nyt_scotus_raw <- bind_rows(lapply(
  major_issues$issue,
  function(code) run_nyt_scotus_sameyear(
    issue_code = code,
    nyt_df = nyt,
    scotus_df = scotus
  )
)) |>
  left_join(major_issues, by = "issue") |>
  arrange(issue) |>
  relocate(label, .after = issue)

nyt_scotus_twoline <- nyt_scotus_raw |>
  transmute(
    issue_code = issue,
    Issue = label,

    sameyear_rho = fmt_rho(rho_sameyear, p_sameyear),
    sameyear_p   = fmt_p(p_sameyear),

    avg10_rho    = fmt_rho(rho_avg10, p_avg10),
    avg10_p      = fmt_p(p_avg10)
  ) |>
  pivot_longer(
    cols = -c(issue_code, Issue),
    names_to = c("Measure", "Stat"),
    names_pattern = "^(.*)_(rho|p)$",
    values_to = "value"
  ) |>
  pivot_wider(names_from = Measure, values_from = value) |>
  arrange(issue_code, factor(Stat, levels = c("rho", "p"))) |>
  mutate(
    Issue = ifelse(Stat == "p", "", Issue),
    Stat  = ifelse(Stat == "rho", "ρ", "")
  ) |>
  select(-issue_code) |>
  relocate(Stat, .after = Issue)

nyt_scotus_twoline <- nyt_scotus_twoline |>
  rename(
    `Times Index x SCOTUS (1-yr lag)` = sameyear,
    `Times Index x SCOTUS (10-yr avg)` = avg10
  )

print(nyt_scotus_twoline)

knitr::kable(
   nyt_scotus_twoline,
   align = "llcc"
)

```

```{r}
subissues <- tibble(
  subissue = c(201, 202, 204, 205, 206, 207, 208, 209, 20081),
  label = c(
    "Ethnic Minority and Racial Group Discrimination",
    "Gender and Sexual Orientation Discrimination",
    "Age Discrimination",
    "Handicap or Disease Discrimination",
    "Voting Rights and Issues",
    "Freedom of Speech & Religion",
    "Right to Privacy and Access to Government Information",
    "Anti-Government Activities",
    "Abortion"
  )
)

nyt_total_by_year <- nyt |>
  mutate(year = as.integer(year)) |>
  filter(!is.na(year),
         year >= min_cov_year,
         year <= max_cov_year) |>
  count(year, name = "nyt_total")

build_nytcivil_share_and_avg10 <- function(nyt_civil_df, subissue_code) {

  numerator <- nyt_civil_df |>
    mutate(year = as.integer(year), subissue = as.integer(subissue)) |>
    filter(!is.na(year),
           year >= min_cov_year,
           year <= max_cov_year,
           !is.na(subissue)) |>
    filter(
      if (as.integer(subissue_code) == 208L) subissue %in% c(208L, 20081L)
      else subissue == as.integer(subissue_code)
    ) |>
    count(year, name = "n_issue")

  full <- tibble(year = all_years) |>
    left_join(nyt_total_by_year, by = "year") |>
    left_join(numerator, by = "year") |>
    mutate(
      nyt_total = ifelse(is.na(nyt_total), 0L, nyt_total),
      n_issue   = ifelse(is.na(n_issue), 0L, n_issue),
      share     = ifelse(nyt_total > 0, n_issue / nyt_total, 0)
    ) |>
    arrange(year) |>
    mutate(avg10 = lag_10yr_avg(share)) |>
    select(year, share, avg10)

  full
}

build_scotus_share_and_avg10 <- function(scotus_df, subissue_code) {

  denominator <- scotus_df |>
    mutate(year = as.integer(year)) |>
    filter(!is.na(year),
           year >= min_cov_year,
           year <= max_cov_year) |>
    count(year, name = "sc_total")

  numerator <- scotus_df |>
    mutate(year = as.integer(year), subtopic = as.integer(subtopic)) |>
    filter(!is.na(year),
           year >= min_cov_year,
           year <= max_cov_year,
           !is.na(subtopic),
           subtopic == as.integer(subissue_code)) |>
    count(year, name = "n_issue")

  full <- tibble(year = all_years) |>
    left_join(denominator, by = "year") |>
    left_join(numerator, by = "year") |>
    mutate(
      sc_total = ifelse(is.na(sc_total), 0L, sc_total),
      n_issue  = ifelse(is.na(n_issue), 0L, n_issue),
      share    = ifelse(sc_total > 0, n_issue / sc_total, 0)
    ) |>
    arrange(year) |>
    mutate(avg10 = lag_10yr_avg(share)) |>
    select(year, share, avg10)

  full
}

run_nytcivil_scotus_subissue <- function(subissue_code, nyt_civil_df, scotus_df) {

  nyt_series <- build_nytcivil_share_and_avg10(nyt_civil_df, subissue_code)
  scotus_series  <- build_scotus_share_and_avg10(scotus_df, subissue_code)

  d <- nyt_series |>
    left_join(scotus_series, by = "year", suffix = c("_nyt", "_scotus"))

  a <- spearman_function(d$share_nyt, d$share_scotus)
  b <- spearman_function(d$avg10_nyt, d$avg10_scotus)

  tibble(
    subissue = as.integer(subissue_code),
    rho_sameyear = a$rho, p_sameyear = a$p, n_sameyear = a$n,
    rho_avg10    = b$rho, p_avg10    = b$p, n_avg10    = b$n
  )
}

nyt_scotus_sub_raw <- bind_rows(lapply(
  subissues$subissue,
  function(code) run_nytcivil_scotus_subissue(
    subissue_code = code,
    nyt_civil_df = nyt_civil,
    scotus_df = scotus
  )
)) |>
  left_join(subissues, by = "subissue") |>
  arrange(subissue) |>
  relocate(label, .after = subissue)

nyt_scotus_sub_twoline <- nyt_scotus_sub_raw |>
  transmute(
    subissue_code = subissue,
    Subissue = label,

    sameyear_rho = fmt_rho(rho_sameyear, p_sameyear),
    sameyear_p   = fmt_p(p_sameyear),

    avg10_rho    = fmt_rho(rho_avg10, p_avg10),
    avg10_p      = fmt_p(p_avg10)
  ) |>
  pivot_longer(
    cols = -c(subissue_code, Subissue),
    names_to = c("Measure", "Stat"),
    names_pattern = "^(.*)_(rho|p)$",
    values_to = "value"
  ) |>
  pivot_wider(names_from = Measure, values_from = value) |>
  arrange(subissue_code, factor(Stat, levels = c("rho", "p"))) |>
  mutate(
    Subissue = ifelse(Stat == "p", "", Subissue),
    Stat     = ifelse(Stat == "rho", "ρ", "")
  ) |>
  select(-subissue_code) |>
  relocate(Stat, .after = Subissue)

nyt_scotus_sub_twoline <- nyt_scotus_sub_twoline |>
  rename(
    `Times Index x SCOTUS (1-yr lag)` = sameyear,
    `Timex Index x SCOTUS (10-yr avg)` = avg10
  )

print(nyt_scotus_sub_twoline)

knitr::kable(
nyt_scotus_sub_twoline,
align = "llcc",
)


```

```{r, fig.width=10, fig.height=14}
library(cowplot)

civil_subissues <- tibble(
  subissue = c(201, 202, 204, 205, 206, 207, 208, 209),
  label = c(
    "Minority discrimination",
    "Gender / sexual orientation",
    "Age discrimination",
    "Handicap / disease discrimination",
    "Voting rights",
    "Speech & religion",
    "Right to privacy",
    "Anti-government activities"
  )
)

year_min <- 1946L
year_max <- 2010L
all_years <- year_min:year_max

nyt_total_by_year <- nyt |>
  mutate(year = as.integer(year)) |>
  filter(!is.na(year), year >= year_min, year <= year_max) |>
  count(year, name = "nyt_total")


nyt_share_series <- function(subissue_code) {

  numerator <- nyt_civil |>
    mutate(year = as.integer(year), subissue = as.integer(subissue)) |>
    filter(!is.na(year), year >= year_min, year <= year_max, !is.na(subissue)) |>
    filter(
      if (as.integer(subissue_code) == 208L) subissue %in% c(208L, 20081L)
      else subissue == as.integer(subissue_code)
    ) |>
    count(year, name = "n")

  tibble(year = all_years) |>
    left_join(nyt_total_by_year, by = "year") |>
    left_join(numerator, by = "year") |>
    mutate(
      nyt_total = tidyr::replace_na(nyt_total, 0L),
      n         = tidyr::replace_na(n, 0L),
      share     = ifelse(nyt_total > 0, n / nyt_total, 0)
    ) |>
    select(year, share)
}


scotus_count_series <- function(subissue_code) {

  yearly <- scotus |>
    mutate(year = as.integer(year), subtopic = as.integer(subtopic)) |>
    filter(
      !is.na(year),
      year >= year_min, year <= year_max,
      !is.na(subtopic),
      subtopic == as.integer(subissue_code)
    ) |>
    count(year, name = "cases")

  tibble(year = all_years) |>
    left_join(yearly, by = "year") |>
    mutate(cases = replace_na(cases, 0L)) |>
    select(year, cases)
}

panel <- bind_rows(lapply(seq_len(nrow(civil_subissues)), function(i) {

  code <- civil_subissues$subissue[i]
  lab  <- civil_subissues$label[i]

  nyt_s    <- nyt_share_series(code)
  scotus_s <- scotus_count_series(code)

  tibble(
    year = all_years,
    subissue = lab,
    nyt_share = nyt_s$share,
    scotus_cases = scotus_s$cases
  )
}))

panel <- panel |>
  mutate(subissue = tools::toTitleCase(subissue))

max_share_global <- max(panel$nyt_share, na.rm = TRUE)
max_cases_global <- max(panel$scotus_cases, na.rm = TRUE)

sf <- ifelse(is.finite(max_cases_global) && max_cases_global > 0,
             max_share_global / max_cases_global,
             1)
sf <- as.numeric(sf)

panel_long <- bind_rows(
  panel |> transmute(year, subissue, series = "NYT coverage share", plot_value = nyt_share),
  panel |> transmute(year, subissue, series = "SCOTUS cases",        plot_value = scotus_cases * sf)
) |>
  mutate(series = factor(series, levels = c("NYT coverage share", "SCOTUS cases")))

y_max_focus <- as.numeric(quantile(panel_long$plot_value, 0.99, na.rm = TRUE))

hearings_years <- hearings |>
  mutate(year = as.integer(year)) |>
  filter(!is.na(year), year >= year_min, year <= year_max) |>
  distinct(year)

top_group <- c(
  "Gender / Sexual Orientation",
  "Age Discrimination",
  "Handicap / Disease Discrimination",
  "Right To Privacy",
  "Anti-Government Activities"
)

bottom_group <- c(
  "Minority Discrimination",
  "Voting Rights",
  "Speech & Religion"
)

top_group <- c(
  "Gender / Sexual Orientation",
  "Age Discrimination",
  "Handicap / Disease Discrimination",
  "Right to Privacy",
  "Anti-Government Activities"
)

make_group_plot <- function(subissues, group_title, show_legend = FALSE, ncol = 2) {

  df <- panel_long |> filter(subissue %in% subissues)

  ggplot(df, aes(x = year, y = plot_value, color = series)) +
    geom_smooth(
      se = FALSE,
      method = "loess",
      span = 0.4,
      linewidth = 0.9
    ) +
    geom_vline(
      data = hearings_years,
      aes(xintercept = year, color = "Confirmation Hearings"),
      inherit.aes = FALSE,
      linewidth = 0.3,
      alpha = 0.3
    ) +
    facet_wrap(~ subissue, ncol = ncol) +
    scale_x_continuous(
      limits = c(year_min, year_max),
      breaks = seq(1940, 2010, by = 10),
      minor_breaks = NULL
    ) +
    scale_y_continuous(
      name = "Times Index Coverage Share",
      labels = percent_format(accuracy = 0.1),
      expand = expansion(mult = c(0, 0.02)),
      sec.axis = sec_axis(~ . / sf, name = "Supreme Court Case Count")
    ) +
    coord_cartesian(ylim = c(0, y_max_focus)) +
    scale_color_manual(
      name = "Series",
      breaks = c("NYT coverage share", "SCOTUS cases", "Confirmation Hearings"),
      values = c(
        "NYT coverage share"     = "#0072B2",  
        "SCOTUS cases"           = "#D55E00",  
        "Confirmation Hearings"  = "black"
      ),
      labels = c(
        "Times Index Coverage Share",
        "Supreme Court Case Count",
        "Confirmation Hearings"
      )
    ) +
    guides(
      color = guide_legend(
        override.aes = list(
          linewidth = c(1.1, 1.1, 0.4),
          alpha     = c(1,   1,   1)
        )
      )
    ) +
    labs(
      title = group_title,
      x = NULL
    ) +
    theme_minimal(base_size = 11, base_family = "serif") +
    theme(
      panel.grid.minor = element_blank(),
      legend.position = if (show_legend) "top" else "none",
      strip.text = element_text(face = "bold"),
      plot.title = element_text(face = "bold", hjust = 0.5)
    )
}

p_top_for_legend <- make_group_plot(
  top_group,
  "Salience-Responsive Subissues",
  show_legend = TRUE,
  ncol = 2
)
shared_legend <- get_legend(p_top_for_legend)

p_top <- make_group_plot(
  top_group,
  "Salience-Responsive Subissues",
  show_legend = FALSE,
  ncol = 2
)

p_bottom <- make_group_plot(
  bottom_group,
  "Non-Salience-Responsive Subissues",
  show_legend = FALSE,
  ncol = 2
)

final_plot <- plot_grid(
  shared_legend,
  p_top,
  p_bottom,
  ncol = 1,
  rel_heights = c(0.12, 1.2, 1)
)

print(final_plot)

```

```{r}
hearings |>
  filter(year >= 1946) |>
  group_by(order,name, year) |>
  summarize(
    count = n()
  )
```







